require.js helps to convert the whole notebook to a html file
%%HTML
<script src="require.js"></script>
Importing all the libraries needed
import os
import re
import csv
import time
import urllib
import zipfile
import numpy as np
import pandas as pd
import seaborn as sns
import networkx as nx
import plotly.io as pio
import plotly.express as px
import plotly.offline as py
from bs4 import BeautifulSoup
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn import preprocessing
from urllib.request import urlopen
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from kaggle.api.kaggle_api_extended import KaggleApi
from sklearn.linear_model import LogisticRegressionCV
from sklearn.model_selection import train_test_split
api = KaggleApi()
api.authenticate()
pio.renderers.default='notebook'
sns.set(style='white',palette = 'Set3',context = 'talk')
Displaying the first 5 rows of Marvel Dataset
if os.path.exists("marvel.csv"):
os.remove("marvel.csv")
api.dataset_download_file('fivethirtyeight/fivethirtyeight-comic-characters-dataset', 'marvel-wikia-data.csv')
with zipfile.ZipFile("marvel-wikia-data.csv.zip", 'r') as zip_ref:
zip_ref.extractall()
os.rename("marvel-wikia-data.csv", "marvel.csv")
os.remove("marvel-wikia-data.csv.zip")
marvel = pd.read_csv("marvel.csv")
marvel.head()
| page_id | name | urlslug | ID | ALIGN | EYE | HAIR | SEX | GSM | ALIVE | APPEARANCES | FIRST APPEARANCE | Year | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1678 | Spider-Man (Peter Parker) | \/Spider-Man_(Peter_Parker) | Secret Identity | Good Characters | Hazel Eyes | Brown Hair | Male Characters | NaN | Living Characters | 4043.0 | Aug-62 | 1962.0 |
| 1 | 7139 | Captain America (Steven Rogers) | \/Captain_America_(Steven_Rogers) | Public Identity | Good Characters | Blue Eyes | White Hair | Male Characters | NaN | Living Characters | 3360.0 | Mar-41 | 1941.0 |
| 2 | 64786 | Wolverine (James \"Logan\" Howlett) | \/Wolverine_(James_%22Logan%22_Howlett) | Public Identity | Neutral Characters | Blue Eyes | Black Hair | Male Characters | NaN | Living Characters | 3061.0 | Oct-74 | 1974.0 |
| 3 | 1868 | Iron Man (Anthony \"Tony\" Stark) | \/Iron_Man_(Anthony_%22Tony%22_Stark) | Public Identity | Good Characters | Blue Eyes | Black Hair | Male Characters | NaN | Living Characters | 2961.0 | Mar-63 | 1963.0 |
| 4 | 2460 | Thor (Thor Odinson) | \/Thor_(Thor_Odinson) | No Dual Identity | Good Characters | Blue Eyes | Blond Hair | Male Characters | NaN | Living Characters | 2258.0 | Nov-50 | 1950.0 |
Displaying the first 5 rows of DC Dataset
if os.path.exists("dc.csv"):
os.remove("dc.csv")
api.dataset_download_file('fivethirtyeight/fivethirtyeight-comic-characters-dataset', 'dc-wikia-data.csv')
with zipfile.ZipFile("dc-wikia-data.csv.zip", 'r') as zip_ref:
zip_ref.extractall()
os.rename("dc-wikia-data.csv", "dc.csv")
os.remove("dc-wikia-data.csv.zip")
dc = pd.read_csv("dc.csv")
dc.head()
| page_id | name | urlslug | ID | ALIGN | EYE | HAIR | SEX | GSM | ALIVE | APPEARANCES | FIRST APPEARANCE | YEAR | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1422 | Batman (Bruce Wayne) | \/wiki\/Batman_(Bruce_Wayne) | Secret Identity | Good Characters | Blue Eyes | Black Hair | Male Characters | NaN | Living Characters | 3093.0 | 1939, May | 1939.0 |
| 1 | 23387 | Superman (Clark Kent) | \/wiki\/Superman_(Clark_Kent) | Secret Identity | Good Characters | Blue Eyes | Black Hair | Male Characters | NaN | Living Characters | 2496.0 | 1986, October | 1986.0 |
| 2 | 1458 | Green Lantern (Hal Jordan) | \/wiki\/Green_Lantern_(Hal_Jordan) | Secret Identity | Good Characters | Brown Eyes | Brown Hair | Male Characters | NaN | Living Characters | 1565.0 | 1959, October | 1959.0 |
| 3 | 1659 | James Gordon (New Earth) | \/wiki\/James_Gordon_(New_Earth) | Public Identity | Good Characters | Brown Eyes | White Hair | Male Characters | NaN | Living Characters | 1316.0 | 1987, February | 1987.0 |
| 4 | 1576 | Richard Grayson (New Earth) | \/wiki\/Richard_Grayson_(New_Earth) | Secret Identity | Good Characters | Blue Eyes | Black Hair | Male Characters | NaN | Living Characters | 1237.0 | 1940, April | 1940.0 |
dcdist = dc['SEX'].value_counts()
marveldist = marvel['SEX'].value_counts()
dctrace = go.Pie(labels = dcdist.index, values = dcdist.values, name = 'DC', domain = {"column": 0})
marveltrace = go.Pie(labels = marveldist.index, values = marveldist.values, name = 'Marvel', domain = {"column": 1})
layout = go.Layout(title = 'Gender Distributions of DC and Marvel Respectively', width = 750, grid = {"rows": 1, "columns": 2})
py.iplot(go.Figure(data = [dctrace, marveltrace], layout = layout))
In both Marvel and DC, the majority of the characters are male
dcdist = dc['ALIGN'].value_counts()
marveldist = marvel['ALIGN'].value_counts()
dctrace = go.Bar(x = dcdist.index, y = dcdist.values, name = 'DC', marker=dict(color='rgb(255,163,63)'))
marveltrace = go.Bar(x = marveldist.index, y = marveldist.values, name = 'Marvel', marker=dict(color='rgb(146,205,40)'))
py.iplot(go.Figure(data = [dctrace, marveltrace], layout = go.Layout(title = 'Character Distributions', width=600)))
Since we mostly have the same main heroes in both the universes and the villians keep on rotating as they are defeated by the heroes, therefore the villians are more in number as compared to heroes
dcdist = dc['ID'].value_counts()
marveldist = marvel['ID'].value_counts()
dctrace = go.Pie(labels = dcdist.index, values = dcdist.values, name = 'DC', domain = {"column": 0}, hole = 0.3)
marveltrace = go.Pie(labels = marveldist.index, values = marveldist.values, name = 'Marvel', domain = {"column": 1}, hole = 0.3)
layout = go.Layout(title = 'Identity Distributions', width = 900, grid = {"rows": 1, "columns": 2}, annotations = [
{"font": {"size": 20}, "showarrow": False, "text": "DC", "x": 0.21, "y": 0.5 },
{"font": {"size": 20}, "showarrow": False, "text": "Marvel", "x": 0.82, "y": 0.5 }])
py.iplot(go.Figure(data = [dctrace, marveltrace], layout = layout))
Most of the characters like to have a Secret Identity because they dont want their adventourous life to clash with their day to day work
dcdist = dc['ALIVE'].value_counts()
marveldist = marvel['ALIVE'].value_counts()
dctrace = go.Bar(x = dcdist.index, y = dcdist.values, name = 'DC', marker=dict(color='rgb(20,61,89)'))
marveltrace = go.Bar(x = marveldist.index, y = marveldist.values, name = 'Marvel', marker=dict(color='rgb(244,180,26)'))
py.iplot(go.Figure(data = [dctrace, marveltrace], layout = go.Layout(title = 'Mortality Distribution', width=600, barmode='stack')))
hightrace = go.Scatter(x=marvel.Year, y=marvel.APPEARANCES, name = "Marvel Appearances", line = dict(color = '#17BECF'), opacity = 0.8)
lowtrace = go.Scatter(x=dc.YEAR, y=dc.APPEARANCES, name = "DC Appearances", line = dict(color = '#7F7F7F'), opacity = 0.8)
layout = dict(title='Appearances with respect to Origin year', width = 750,xaxis=dict
(rangeselector=dict(buttons= list([dict(count=1, label='1Y',step='year', stepmode='backward'),
dict(count=6, label='6Y',step='year',stepmode='backward'),dict(step='all')])),rangeslider=dict(visible = True),type='date'))
py.iplot(dict(data= [hightrace ,lowtrace], layout=layout))
dc['comics']= 'Top 3 DC Characters'
dctop3 = dc.truncate(before=-1, after=2)
FG = nx.from_pandas_edgelist(dctop3, source='comics', target='name', edge_attr=True)
nx.draw_networkx(FG, with_labels=True)
Very predictable result
marvel['comics'] = 'Top 3 Marvel Characters'
marveltop3 = marvel.truncate(before=-1, after=2)
FG = nx.from_pandas_edgelist(marveltop3, source='comics', target='name', edge_attr=True)
nx.draw_networkx(FG, with_labels=True)
Suprisingly Iron Man (Tony Stark) is not present in the Top 3 Marvel Characters according to the data extracted from internet.
marvel = marvel.drop(columns = "comics")
dc = dc.drop(columns= "comics")
marvelheroes = marvel.loc[marvel['ALIGN'] == "Good Characters"].head(10)
marvelvillains = marvel.loc[marvel['ALIGN'] == "Bad Characters"].head(10)
dcheroes = dc.loc[dc['ALIGN'] == "Good Characters"].head(10)
dcvillains = dc.loc[dc['ALIGN'] == "Bad Characters"].head(10)
def clean(x):
x.name = x.name.apply(lambda x: x.split('(')[0])
cols = ('ID','ALIGN','EYE','HAIR','SEX','ALIVE')
for c in cols:
x[c]= x[c].fillna('Unknown')
x[c]= x[c].apply(lambda x: x.split(' ')[0])
clean(marvelheroes)
clean(marvelvillains)
clean(dcheroes)
clean(dcvillains)
plt.subplots(1,2,figsize=(18,6))
plt.subplots_adjust(wspace =0.3)
plt.subplot(121)
sns.boxenplot(x='APPEARANCES', y='HAIR',data= dcheroes ,hue='EYE').set_title('Top Appearance of Heroes in DC')
plt.subplot(122)
sns.boxenplot(x='APPEARANCES', y='HAIR',data= dcvillains ,hue='EYE').set_title('Top Appearance of Villains in DC')
Text(0.5, 1.0, 'Top Appearance of Villains in DC')
In DC :- \ Heroes mostly have black hair with blue eyes and sometimes blond hair with brown eyes \ Villains mostly have red eyes with any hair color or sometimes brown hair with blue eyes
plt.subplots(1,2,figsize=(18,6))
plt.subplots_adjust(wspace =0.3)
plt.subplot(121)
sns.boxenplot(x='APPEARANCES', y='HAIR',data = marvelheroes ,hue='EYE').set_title('Top Appearance of Heroes in Marvel')
plt.subplot(122)
sns.boxenplot(x='APPEARANCES', y='HAIR',data = marvelvillains ,hue='EYE').set_title('Top Appearance of Villians in Marvel')
Text(0.5, 1.0, 'Top Appearance of Villians in Marvel')
In Marvel :- \ Heroes mostly have blond hair with blue eyes and occasionaly other appearences \ Villians dont have any specefic appearence
dcsi = dc.loc[dc['ID'] == "Secret Identity"]
marvelsi = marvel.loc[marvel['ID'] == "Secret Identity"]
dcdist = dcsi['ALIGN'].value_counts()
marveldist = marvelsi['ALIGN'].value_counts()
dctrace = go.Pie(labels = dcdist.index, values = dcdist.values, name = 'DC', domain = {"column": 0}, hole = 0.3)
marveltrace = go.Pie(labels = marveldist.index, values = marveldist.values, name = 'Marvel', domain = {"column": 1}, hole = 0.3)
layout = go.Layout(title = 'Secret Identity', width = 900, grid = {"rows": 1, "columns": 2}, annotations = [
{"font": {"size": 20}, "showarrow": False, "text": "DC", "x": 0.21, "y": 0.5 },
{"font": {"size": 20}, "showarrow": False, "text": "Marvel", "x": 0.82, "y": 0.5 }])
py.iplot(go.Figure(data = [dctrace, marveltrace], layout = layout))
In both Marvel and DC more of the Bad Characters have secret identites which totally makes sense because if you are running from the justice then you probably don’t want to give up your home address.
dcdead = dc.loc[dc['ALIVE'] == "Deceased Characters"]
marveldead = marvel.loc[marvel['ALIVE'] == "Deceased Characters"]
dcdist = dcdead['ALIGN'].value_counts()
marveldist = marveldead['ALIGN'].value_counts()
dctrace = go.Pie(labels = dcdist.index, values = dcdist.values, name = 'DC', domain = {"column": 0}, hole = 0.3)
marveltrace = go.Pie(labels = marveldist.index, values = marveldist.values, name = 'Marvel', domain = {"column": 1}, hole = 0.3)
layout = go.Layout(title = 'Deceased Characters', width = 900, grid = {"rows": 1, "columns": 2}, annotations = [
{"font": {"size": 20}, "showarrow": False, "text": "DC", "x": 0.21, "y": 0.5 },
{"font": {"size": 20}, "showarrow": False, "text": "Marvel", "x": 0.82, "y": 0.5 }])
py.iplot(go.Figure(data = [dctrace, marveltrace], layout = layout))
In Marvel more villians are deceased as proportional to DC \ That's why DC is darker than Marvel becuase it has more villians living as proportional to Marvel
Transgender Characters in DC with their images extracted from the internet
dctc = dc.loc[dc['SEX'] == "Transgender Characters"].name
for i in dctc:
print(i)
temp = i.replace(" ", "_")
dclink = 'https://dc.fandom.com/wiki/'+temp
html = urlopen(dclink)
bs = BeautifulSoup(html, 'html.parser')
il = bs.find('img', {'src':re.compile('.jpg')})['src']
urllib.request.urlretrieve(il,"img.jpg")
plt.imshow(mpimg.imread('img.jpg'))
plt.axis('off')
plt.show()
os.remove("img.jpg")
Daystar (New Earth)
Genderfluid Characters in Marvel with their images extracted from the internet
marvelgc = marvel.loc[marvel['SEX'] == "Genderfluid Characters"].name
for i in marvelgc:
print(i)
temp = i.replace(" ", "_")
marvellink = 'https://marvel.fandom.com/wiki/'+temp
html = urlopen(marvellink)
bs = BeautifulSoup(html, 'html.parser')
il = bs.find('img', {'src':re.compile('.jpg')})['src']
urllib.request.urlretrieve(il,"img.jpg")
plt.imshow(mpimg.imread('img.jpg'))
plt.axis('off')
plt.show()
os.remove("img.jpg")
Loki Laufeyson (Earth-616)
Xavin (Earth-616)
Search for a DC Character
dcnames = dc.name
co = 0
arr = []
str = input("Enter DC Character name that you are searching for : ").lower()
for i in dcnames :
li = i.lower()
if str in li :
co = co + 1
if co == 1 :
print("Did you mean ? :")
print(co,i)
arr.append(i)
time.sleep(1)
ch = int(input("Enter Your Choice : "))
dcname = arr[ch-1]
print("Name :",dcname)
print("Image :- ")
temp = dcname.replace(" ", "_")
dclink = 'https://dc.fandom.com/wiki/'+temp
html = urlopen(dclink)
bs = BeautifulSoup(html, 'html.parser')
il = bs.find('img', {'src':re.compile('.jpg')})['src']
urllib.request.urlretrieve(il,"img.jpg")
plt.imshow(mpimg.imread('img.jpg'))
plt.axis('off')
plt.show()
os.remove("img.jpg")
print("Find out more about this DC character here : ",dclink)
Did you mean ? : 1 Batman (Bruce Wayne) 2 Bruce Gordon (New Earth) 3 Bruce Wayne Clone (New Earth) 4 Mighty Bruce (New Earth) 5 William Bruce Smith (New Earth) Name : Bruce Gordon (New Earth) Image :-
Find out more about this DC character here : https://dc.fandom.com/wiki/Bruce_Gordon_(New_Earth)
Search for a Marvel Character
marvelnames = marvel.name
co = 0
arr = []
str = input("Enter Marvel Character name that you are searching for : ").lower()
for i in marvelnames :
li = i.lower()
if str in li :
co = co + 1
if co == 1 :
print("Did you mean ? :")
print(co,i)
arr.append(i)
time.sleep(1)
ch = int(input("Enter Your Choice : "))
marvelname = arr[ch-1]
print("Name :",marvelname)
print("Image :- ")
temp = marvelname.replace(" ", "_")
marvellink = 'https://marvel.fandom.com/wiki/'+temp
html = urlopen(marvellink)
bs = BeautifulSoup(html, 'html.parser')
il = bs.find('img', {'src':re.compile('.jpg')})['src']
urllib.request.urlretrieve(il,"img.jpg")
plt.imshow(mpimg.imread('img.jpg'))
plt.axis('off')
plt.show()
os.remove("img.jpg")
print("Find out more about this Marvel character here : ",marvellink)
Did you mean ? : 1 Vision (Earth-616) 2 Vision (Jonas) (Earth-616) 3 Vision (Skrull) (Earth-616) 4 Vision (Doppelganger) (Earth-616) 5 Vision (Taskmaster Robot) (Earth-616) 6 Vision (Onslaught Reborn) (Earth-616) 7 Anitun (Triumph Division's second incarnation) (Earth-616) 8 Great Mongoose (Triumph Division's second incarnation) (Earth-616) 9 Mighty Mother (Triumph Division's second incarnation) (Earth-616) 10 Red Feather (Triumph Division's second incarnation) (Earth-616) 11 St. George (Triumph Division's second incarnation) (Earth-616) 12 Wishing Man (Triumph Division's second incarnation) (Earth-616) 13 Fighter One (Triumph Division's second incarnation) (Earth-616) Name : Vision (Jonas) (Earth-616) Image :-
Find out more about this Marvel character here : https://marvel.fandom.com/wiki/Vision_(Jonas)_(Earth-616)
Let's see If you know this random DC character
dcrand = dc.sample().name
for i in dcrand:
print("Name : ",i)
temp = i.replace(" ", "_")
dclink = 'https://dc.fandom.com/wiki/'+temp
html = urlopen(dclink)
bs = BeautifulSoup(html, 'html.parser')
il = bs.find('img', {'src':re.compile('.jpg')})['src']
urllib.request.urlretrieve(il,"img.jpg")
plt.imshow(mpimg.imread('img.jpg'))
plt.axis('off')
plt.show()
os.remove("img.jpg")
print("Find out more about this DC character here : ",dclink)
Name : Ch'p (New Earth)
Find out more about this DC character here : https://dc.fandom.com/wiki/Ch'p_(New_Earth)
Let's see If you know this random Marvel character
marvelrand = marvel.sample().name
for i in marvelrand:
print("Name : ",i)
temp = i.replace(" ", "_")
marvellink = 'https://marvel.fandom.com/wiki/'+temp
html = urlopen(marvellink)
bs = BeautifulSoup(html, 'html.parser')
il = bs.find('img', {'src':re.compile('.jpg')})['src']
urllib.request.urlretrieve(il,"img.jpg")
plt.imshow(mpimg.imread('img.jpg'))
plt.axis('off')
plt.show()
os.remove("img.jpg")
print("Find out more about this Marvel character here : ",marvellink)
Name : Glynis Oliver (Earth-616)
Find out more about this Marvel character here : https://marvel.fandom.com/wiki/Glynis_Oliver_(Earth-616)
Dropping columns with useless data before applying algorithms
dc.drop('urlslug', axis=1, inplace=True)
marvel.drop('urlslug', axis=1, inplace=True)
dc.drop('EYE', axis=1, inplace=True)
marvel.drop('EYE', axis=1, inplace=True)
dc.drop('HAIR', axis=1, inplace=True)
marvel.drop('HAIR', axis=1, inplace=True)
dc.drop('FIRST APPEARANCE', axis=1, inplace=True)
marvel.drop('FIRST APPEARANCE', axis=1, inplace=True)
dc.drop('name', axis=1, inplace=True)
marvel.drop('name', axis=1, inplace=True)
Some Machine Learning Algorithms applied on DC Characters
dc['ALIGN'].replace('Good Characters',2, inplace = True)
dc['ALIGN'].replace('Bad Characters',1, inplace = True)
dc['ALIGN'].replace('Neutral Characters',3, inplace = True)
dc['ALIGN'].replace('Reformed Criminals',4, inplace = True)
dc['SEX'].replace('Male Characters', 2, inplace = True)
dc['SEX'].replace('Female Characters', 1, inplace = True)
dc['SEX'].replace('Genderless Characters', 3, inplace = True)
dc['SEX'].replace('Transgender Characters', 4, inplace = True)
dc['ALIVE'].replace('Living Characters', 1, inplace = True)
dc['ALIVE'].replace('Deceased Characters', 0, inplace = True)
dc['ID'].replace('Secret Identity', 2, inplace = True)
dc['ID'].replace('Public Identity', 1, inplace = True)
dc['ID'].replace('Identity Unknown', 3, inplace = True)
dc['GSM'].replace('Bisexual Characters', 1, inplace = True)
dc['GSM'].replace('Homosexual Characters', 2, inplace = True)
dc.replace(np.nan, 0,inplace=True)
dc.ALIVE = dc.ALIVE.astype(int)
gsm = dc['GSM'].values.reshape(-1,1)
min_max_scaler = preprocessing.MinMaxScaler()
gsm_scaled = min_max_scaler.fit_transform(gsm)
dc['GSM'] = pd.DataFrame(gsm_scaled)
id = dc['ID'].values.reshape(-1,1)
min_max_scaler = preprocessing.MinMaxScaler()
id_scaled = min_max_scaler.fit_transform(id)
dc['ID'] = pd.DataFrame(id_scaled)
sex = dc['SEX'].values.reshape(-1,1)
min_max_scaler = preprocessing.MinMaxScaler()
sex_scaled = min_max_scaler.fit_transform(sex)
dc['SEX'] = pd.DataFrame(sex_scaled)
align = dc['ALIGN'].values.reshape(-1,1)
min_max_scaler = preprocessing.MinMaxScaler()
align_scaled = min_max_scaler.fit_transform(align)
dc['ALIGN'] = pd.DataFrame(align_scaled)
page_id = dc['page_id'].values.reshape(-1,1)
min_max_scaler = preprocessing.MinMaxScaler()
page_id_scaled = min_max_scaler.fit_transform(page_id)
dc['page_id'] = pd.DataFrame(page_id_scaled)
app = dc['APPEARANCES'].values.reshape(-1,1)
min_max_scaler = preprocessing.MinMaxScaler()
app_scaled = min_max_scaler.fit_transform(app)
dc['APPEARANCES'] = pd.DataFrame(app_scaled)
year = dc['YEAR'].values.reshape(-1,1)
min_max_scaler = preprocessing.MinMaxScaler()
year_scaled = min_max_scaler.fit_transform(year)
dc['YEAR'] = pd.DataFrame(year_scaled)
print("Co-relation between mortality of characters and their non-hetrosexual preferences = ",dc['ALIVE'].corr(dc['GSM']))
trainX, testX, trainY, testY = train_test_split(dc[['GSM', 'SEX', 'ALIGN','ID']], dc['ALIVE'], test_size = 0.3)
dtc = DecisionTreeClassifier()
dtc.fit(trainX, trainY)
accuracy = dtc.score(testX, testY)
print("Decision Tree Classifier Accuracy = ",accuracy)
lr = LogisticRegressionCV()
lr.fit(trainX, trainY)
accuracy = lr.score(testX, testY)
print("Logistic Regression CV Accuracy = ",accuracy)
kNN = KNeighborsClassifier()
kNN.fit(trainX, trainY)
accuracy = kNN.score(testX, testY)
print("K Neighbors Classifier Accuracy = ",accuracy)
Co-relation between mortality of characters and their non-hetrosexual preferences = 0.02993401630973803 Decision Tree Classifier Accuracy = 0.7689705171580473 Logistic Regression CV Accuracy = 0.7684871918801354 K Neighbors Classifier Accuracy = 0.7109714838086032
Some Machine Learning Algorithms applied on Marvel Characters
marvel['ALIGN'].replace('Good Characters',2, inplace = True)
marvel['ALIGN'].replace('Bad Characters',1, inplace = True)
marvel['ALIGN'].replace('Neutral Characters',3, inplace = True)
marvel['SEX'].replace('Male Characters', 2, inplace = True)
marvel['SEX'].replace('Female Characters', 1, inplace = True)
marvel['SEX'].replace('Genderfluid Characters', 3, inplace = True)
marvel['SEX'].replace('Agender Characters', 4, inplace = True)
marvel['ALIVE'].replace('Living Characters', 1, inplace = True)
marvel['ALIVE'].replace('Deceased Characters', 0, inplace = True)
marvel['ID'].replace('Secret Identity', 2, inplace = True)
marvel['ID'].replace('Public Identity', 1, inplace = True)
marvel['ID'].replace('No Dual Identity', 3, inplace = True)
marvel['ID'].replace('Known to Authorities Identity', 4, inplace = True)
marvel['GSM'].replace('Bisexual Characters', 1, inplace = True)
marvel['GSM'].replace('Transvestites', 2, inplace = True)
marvel['GSM'].replace('Homosexual Characters', 3, inplace = True)
marvel['GSM'].replace('Pansexual Characters', 4, inplace = True)
marvel['GSM'].replace('Transgender Characters', 5, inplace = True)
marvel['GSM'].replace('Genderfluid Characters', 6, inplace = True)
marvel.replace(np.nan, 0,inplace=True)
marvel.ALIVE = marvel.ALIVE.astype(int)
gsm = marvel['GSM'].values.reshape(-1,1)
min_max_scaler = preprocessing.MinMaxScaler()
gsm_scaled = min_max_scaler.fit_transform(gsm)
marvel['GSM'] = pd.DataFrame(gsm_scaled)
id = marvel['ID'].values.reshape(-1,1)
min_max_scaler = preprocessing.MinMaxScaler()
id_scaled = min_max_scaler.fit_transform(id)
marvel['ID'] = pd.DataFrame(id_scaled)
sex = marvel['SEX'].values.reshape(-1,1)
min_max_scaler = preprocessing.MinMaxScaler()
sex_scaled = min_max_scaler.fit_transform(sex)
marvel['SEX'] = pd.DataFrame(sex_scaled)
align = marvel['ALIGN'].values.reshape(-1,1)
min_max_scaler = preprocessing.MinMaxScaler()
align_scaled = min_max_scaler.fit_transform(align)
marvel['ALIGN'] = pd.DataFrame(align_scaled)
page_id = marvel['page_id'].values.reshape(-1,1)
min_max_scaler = preprocessing.MinMaxScaler()
page_id_scaled = min_max_scaler.fit_transform(page_id)
marvel['page_id'] = pd.DataFrame(page_id_scaled)
app = marvel['APPEARANCES'].values.reshape(-1,1)
min_max_scaler = preprocessing.MinMaxScaler()
app_scaled = min_max_scaler.fit_transform(app)
marvel['APPEARANCES'] = pd.DataFrame(app_scaled)
year = marvel['Year'].values.reshape(-1,1)
min_max_scaler = preprocessing.MinMaxScaler()
year_scaled = min_max_scaler.fit_transform(year)
marvel['Year'] = pd.DataFrame(year_scaled)
print("Co-relation between mortality of characters and their non-hetrosexual preferences = ",marvel['ALIVE'].corr(marvel['GSM']))
trainX, testX, trainY, testY = train_test_split(marvel[['GSM', 'SEX', 'ALIGN','ID']], marvel['ALIVE'], test_size = 0.3)
dtc = DecisionTreeClassifier()
dtc.fit(trainX, trainY)
accuracy = dtc.score(testX, testY)
print("Decision Tree Classifier Accuracy = ",accuracy)
lr = LogisticRegressionCV()
lr.fit(trainX, trainY)
accuracy = lr.score(testX, testY)
print("Logistic Regression CV Accuracy = ",accuracy)
kNN = KNeighborsClassifier()
kNN.fit(trainX, trainY)
accuracy = kNN.score(testX, testY)
print("K Neighbors Classifier Accuracy = ",accuracy)
Co-relation between mortality of characters and their non-hetrosexual preferences = 0.0006886665922864184 Decision Tree Classifier Accuracy = 0.7799715041726033 Logistic Regression CV Accuracy = 0.7801750457968655 K Neighbors Classifier Accuracy = 0.6539792387543253